package me.itblog.quartz;


import com.rometools.rome.feed.synd.SyndCategory;
import com.rometools.rome.feed.synd.SyndContent;
import com.rometools.rome.feed.synd.SyndEntry;
import com.rometools.rome.feed.synd.SyndFeed;
import com.rometools.rome.io.SyndFeedInput;
import com.rometools.rome.io.XmlReader;
import me.itblog.bean.Article;
import me.itblog.bean.AuthorProfile;
import me.itblog.bean.Category;
import me.itblog.services.ArticleLuceneService;
import me.itblog.services.CacheManager;
import me.itblog.utils.Constant;
import org.nutz.dao.Cnd;
import org.nutz.dao.Dao;
import org.nutz.ioc.loader.annotation.Inject;
import org.nutz.ioc.loader.annotation.IocBean;
import org.nutz.lang.Strings;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;

import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by infi.he on 2015/10/27.
 */
@IocBean
public class RssFetchJob implements Job {

    private static final Log log = Logs.get();

    @Inject
    Dao dao;

    @Inject
    CacheManager cacheManager;

    @Inject
    ArticleLuceneService articleLuceneService;

    public void execute(JobExecutionContext context) throws JobExecutionException {
        log.debug("TestJob , start");
        List<AuthorProfile> authorProfileList = dao.query(AuthorProfile.class, Cnd.where("fetchType", "=", 1).and("isDelete", "=", 0));
        for (AuthorProfile authorProfile : authorProfileList) {
            try {
                log.info("开始抓取 authorProfile>>" + authorProfile.getName() + " Feed>>>" + authorProfile.getFeed() + " 开始");
                fetchAuthorProfile(authorProfile);
            } catch (Exception ex) {
                log.error("保存异常>>>>", ex);
            } finally {
                log.info("开始抓取authorProfile>>" + authorProfile.getName() + " Feed>>>" + authorProfile.getFeed() + " 完成");
            }
        }
        log.info("RssFetchJob_execute");
        log.debug("TestJob , Done");
    }

    private void fetchAuthorProfile(AuthorProfile authorProfile) throws Exception {

        URL feedUrl = new URL(authorProfile.getFeed());
        SyndFeedInput input = new SyndFeedInput();
        input.setAllowDoctypes(true);
        URLConnection urlConnection = feedUrl.openConnection();
        urlConnection.setConnectTimeout(Constant.fetch_connect_time_out);
        urlConnection.setReadTimeout(Constant.fetch_read_time_out);
        SyndFeed feed = input.build(new XmlReader(urlConnection));
        // 得到Rss新闻中子项列表
        List entries = feed.getEntries();
        // 循环得到每个子项信息
        log.info("开始抓取authorProfile>>" + authorProfile.getName() + "一共抓到size>>>" + entries.size());

        for (int i = 0; i < entries.size(); i++) {
            SyndEntry entry = (SyndEntry) entries.get(i);
            String link = entry.getLink();
            Article article = dao.fetch(Article.class, Cnd.where("link", "=", link));
            if (article == null) {
                article = new Article();
                log.info("开始抓取authorProfile>>" + authorProfile.getName() + "正在保存>>>" + i + "   :" + entry.getTitle());
                // 标题、连接地址、标题简介、时间是一个Rss源项最基本的组成部分
                article.setTitle(entry.getTitle());
                article.setLink(entry.getLink());

                SyndContent description = entry.getDescription();
                List<SyndContent> contents = entry.getContents();
                if (description == null && contents == null) {
                    continue;
                } else if(description == null) {
                    for (SyndContent content : contents) {
                        article.setContents(content.getValue());
                    }
                } else {
                    String value = description.getValue();

                    for (SyndContent content : contents) {
                        article.setContents(content.getValue());
                    }
                    //需要将描述作为内容的作者
                    if (authorProfile.getShowDescription() == 1) {
                        article.setContents(value);
                    } else {
                        if (value.length() > 500) {
                            article.setDescription(value.substring(0, 500));
                        }
                    }
                }

                if (Strings.isEmpty(entry.getAuthor())) {
                    article.setAuthorName(authorProfile.getName());
                } else {
                    article.setAuthorName(entry.getAuthor());
                }

                article.setPublishedDate(entry.getPublishedDate());

                // 此标题所属的范畴
                List categoryList = entry.getCategories();
                List<Category> categories = new ArrayList<Category>();
                StringBuilder sb = new StringBuilder();
                if (categoryList != null) {
                    for (int m = 0; m < categoryList.size(); m++) {
                        SyndCategory category = (SyndCategory) categoryList.get(m);
                        String name = category.getName();
                        if (Strings.isNotBlank(name)) {
                            Category dbCategory = dao.fetch(Category.class, Cnd.where("name", "=", name));
                            if (dbCategory == null) {
                                dbCategory = new Category();
                                dbCategory.setName(name);
                                dbCategory.setCnt(1);
                                dao.insert(dbCategory);
                            } else {
                                dbCategory.setCnt(dbCategory.getCnt() + 1);
                                dao.update(dbCategory, "cnt|updateTime");
                                categories.add(dbCategory);
                                sb.append(dbCategory.getId()).append("|").append(name).append(";");
                            }
                        }
                    }
                    if (categories.size() > 0) {
                        article.setCategoryList(categories);
                        log.info("categories>>>>" + sb.toString().length());
                        article.setCategorys(sb.toString());
                    }
                }

                article.setAuthorId(authorProfile.getId());

                try {
                    dao.insert(article);
                    dao.insertRelation(article, "categoryList");
                    cacheManager.cacheNewArtivle(article);
                    articleLuceneService.addArticle(article);
                } catch (Exception e) {
                    log.error("保存article异常,长度>>" + article.getContents().length() + ";" + e.getMessage());
                }
            } else {
                log.info("开始抓取authorProfile>>" + authorProfile.getName() + "已经存在，不用保存>>>" + i + "   :" + entry.getTitle());

            }
            log.info("开始抓取authorProfile>>" + authorProfile.getName() + "第[" + i + "]条>>> 保存完成");
        }
    }
}

